import scrapy
from scrapy_test.items import MusicItem

class KugouSpider(scrapy.Spider):
    name = "kugou"
    allowed_domains = ["kugou.com"]
    start_urls = ["https://www.kugou.com/"]

    def start_requests(self):
        yield scrapy.Request(url='https://www.kugou.com/yy/html/search.html#searchType=special&searchKeyWord=%E8%AF%84%E4%B9%A6')


    def parse(self, response):
        descs = response.xpath('//div[@class="special_list"]/ul[2]/li/div[1]/a[2]//text()').getall()
        peoples = response.xpath('//div[@class="special_list"]/ul[2]/li/div[3]/text()').getall()
        for desc , people in zip(descs , peoples) :
            if not desc or not people :
                print("Invalid args")
                break
            # 清洗 desc 数据：去除首尾空白字符并去除中间多余空白
            cleaned_desc = ' '.join(desc.strip().split())

            # 清洗 people 数据：去除首尾空白字符并去除中间多余空白
            cleaned_people = ' '.join(people.strip().split())
 
            item = MusicItem()
            item['desc'] = cleaned_desc
            item['people'] = cleaned_people
            yield item
        """
        for desc in descs:
            item = MusicItem()
            # 清洗 desc 数据：去除首尾空白字符并去除中间多余空白
            cleaned_desc = ' '.join(desc.strip().split())
            if not cleaned_desc :
                continue
                # 尝试转换为数字验证
            try :
                float(cleaned_desc)
                continue  # 如果是有效数字则跳过
            except ValueError :
                pass
            item['desc'] = cleaned_desc
            yield item
        """